# -*- coding: utf-8 -*-
"""
bagging和pasting
Created on Sat Apr 28 15:00:06 2018

@author: Allen
"""
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets

X, y = datasets.make_moons( n_samples = 500, noise = 0.3, random_state = 42 )

plt.scatter( X[y==0,0], X[y==0,1] )
plt.scatter( X[y==1,0], X[y==1,1] )
plt.show()


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, random_state = 666 )

'''
在集成模型中，要集成成百上千个子模型的话首选就是决策树
'''
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

bagging_clf = BaggingClassifier( DecisionTreeClassifier(),
                                 n_estimators = 500,
                                 max_samples = 100,
                                 bootstrap = True
                                )
'''
n_estimators 集成多少个子模型
max_samples 每一个子模型要看几个样本数据
'''
bagging_clf.fit( X_train, y_train )
print( bagging_clf.score( X_test, y_test ) ) # 0.872